setwd("/Users/leonardocarella/Documents/LSQ_27092023") # change to local wd
`%notin%` <- Negate(`%in%`)
library(tidyverse)
library(readxl)
library(lubridate)
library(lmtest)
library(multiwayvcov)
library(stargazer)
library(sandwich)
library(dfidx)
library(mlogit)
library(DescTools)
library(margins)
library(xtable)

landtag <- readRDS("landtag_18_12.rds")

# code next Landtag election date

landtag$election_date_t_plus1 <- NA

for (i in unique(landtag$state)) {
  
  valid_legislatures <- unique(as.numeric(as.character(landtag$legislature[landtag$state == i])))[unique(
    as.numeric(as.character(landtag$legislature[landtag$state == i]))) != 
      max(unique(as.numeric(as.character(landtag$legislature[landtag$state == i]))))]
  
  for (k in valid_legislatures)
  {
    
    landtag$election_date_t_plus1[landtag$state == i &
                                    as.numeric(as.character(landtag$legislature)) == k] <-
      unique(landtag$election_date[landtag$state == i &
                                     as.numeric(as.character(landtag$legislature)) == k+1] )
  }}

# code whether in landtag at the start/end of each term

landtag$in_landtag_end_term <- 0
landtag$in_landtag_end_term[landtag$election_date_t_plus1 == landtag$legislator_to_date1] <- 1
landtag$in_landtag_end_term[landtag$election_date_t_plus1 == landtag$legislator_to_date2] <- 1
landtag$in_landtag_end_term[is.na(landtag$election_date_t_plus1)] <- NA

landtag$in_landtag_start_term <- 0
landtag$in_landtag_start_term[landtag$election_date == landtag$legislator_from_date1] <- 1

# create ‘target’ dataset of only MPs who were elected to the landtag (excludes substitutes)

landtag_target <- landtag %>%
  filter(in_landtag_start_term == 1 & elected_from != "substitute")

landtag$tier_dummy[landtag$elected_from %in% c("proportional", "substitute")] <- "PR"
landtag$tier_dummy[landtag$elected_from %in% c("SMD")] <- "SMD"

landtag_target$tier_dummy[landtag_target$elected_from %in% c("proportional", "substitute")] <- "PR"
landtag_target$tier_dummy[landtag_target$elected_from %in% c("SMD")] <- "SMD"


# code re_elected (from landtag_target data) and returned (from landtag data)

landtag$re_elected <- NA
landtag$tier_re_election <- NA
landtag$returned <- NA
landtag$tier_return <- NA

for (i in unique(landtag$state)) {
  
  valid_legislatures <- unique(as.numeric(as.character(landtag$legislature[landtag$state == i])))[unique(
    as.numeric(as.character(landtag$legislature[landtag$state == i]))) != 
      max(unique(as.numeric(as.character(landtag$legislature[landtag$state == i]))))]
  
  for (k in valid_legislatures)
  {
    valid_ids <- unique(landtag$id[as.numeric(landtag$legislature) == k &
                                     landtag$state == i])  
    for (j in valid_ids){
      if (j %in% unique(landtag_target$id[as.numeric(landtag_target$legislature) == k+1])){
        landtag$re_elected[landtag$id == j & as.numeric(landtag$legislature) == k] <- 1 }
      
      if (j %notin% unique(landtag_target$id[as.numeric(landtag_target$legislature) == k+1])){
        landtag$re_elected[landtag$id == j & as.numeric(landtag$legislature) == k] <- 0}
      
      if (j %in% unique(landtag$id[as.numeric(landtag$legislature) == k+1])){
        landtag$returned[landtag$id == j & as.numeric(landtag$legislature) == k] <- 1 }
      
      if (j %notin% unique(landtag$id[as.numeric(landtag$legislature) == k+1])){
        landtag$returned[landtag$id == j & as.numeric(landtag$legislature) == k] <- 0}
      
    }
  }
  print(i)
}

#### Descriptive statistics table (first table in Appendix B) ####

# Create variables for table

landtag$age_end_legislature <- time_length(difftime(dmy(landtag$election_date_t_plus1),ymd(landtag$dob)),
                                           unit = "year")

landtag$term_duration <- time_length(difftime(dmy(landtag$election_date_t_plus1),dmy(landtag$election_date)),
                                     unit = "year")

landtag$seniority <- as.numeric(landtag$no_past_terms) + 1

landtag$end_of_term_year <- decimal_date(dmy(landtag$election_date_t_plus1))


# table by state

table1 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(state, tier_dummy) %>%
  dplyr::rename(variable = state) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))

# table by party

table2 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(party_elected_short, tier_dummy) %>%
  dplyr::rename(variable = party_elected_short) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))


# table by seniority, age groups, term duration

landtag$sen_cat <- NA
landtag$sen_cat[landtag$seniority == 1] <- "1"
landtag$sen_cat[landtag$seniority == 2] <- "2"
landtag$sen_cat[landtag$seniority %in% c(3,4)] <- "3"
landtag$sen_cat[landtag$seniority > 4] <- "4"

landtag$age_cat <- NA
landtag$age_cat[landtag$age_end_legislature < 40] <- "1"
landtag$age_cat[landtag$age_end_legislature >= 40 & landtag$age_end_legislature < 50] <- "2"
landtag$age_cat[landtag$age_end_legislature >= 50 & landtag$age_end_legislature < 60] <- "3"
landtag$age_cat[landtag$age_end_legislature >= 60] <- "4"

landtag$term_duration_cat <- NA
landtag$term_duration_cat[landtag$term_duration < 3] <- "1"
landtag$term_duration_cat[landtag$term_duration >= 3 & landtag$term_duration < 4] <- "2"
landtag$term_duration_cat[landtag$term_duration >= 4 & landtag$term_duration < 5] <- "3"
landtag$term_duration_cat[landtag$term_duration >= 5] <- "4"

table3 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(sen_cat, tier_dummy) %>%
  dplyr::rename(variable = sen_cat) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))

table4 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(age_cat, tier_dummy) %>%
  dplyr::filter(!is.na(age_cat)) %>%
  dplyr::rename(variable = age_cat) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))

table5 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(term_duration_cat, tier_dummy) %>%
  dplyr::filter(!is.na(term_duration_cat)) %>%
  dplyr::rename(variable = term_duration_cat) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))

table6 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(gender, tier_dummy) %>%
  dplyr::filter(!is.na(gender)) %>%
  dplyr::rename(variable = gender) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))

table7 <- landtag %>%
  dplyr::filter(in_landtag_end_term == 1) %>%
  dplyr::group_by(tier_dummy) %>%
  dplyr::filter(!is.na(tier_dummy)) %>%
  dplyr::filter(!is.na(re_elected)) %>%
  dplyr::summarise(share_reelected = mean(re_elected),
                   share_returned = mean(returned),
                   count = n()) %>%
  pivot_wider(names_from = tier_dummy,  values_from = c(share_reelected, share_returned, count))

table7 <- cbind(variable = "overall", table7)

descriptives <- as.data.frame(rbind.data.frame(table1, table2, table3, table4, 
                                               table5, table6, table7))

descriptives$share_reelected_PR[!is.na(descriptives$share_reelected_PR)] <- 
  paste(
    round(100*descriptives$share_reelected_PR[!is.na(descriptives$share_reelected_PR)],0), "%", sep = "")

descriptives$share_reelected_SMD[!is.na(descriptives$share_reelected_SMD)] <- 
  paste(
    round(100*descriptives$share_reelected_SMD[!is.na(descriptives$share_reelected_SMD)],0), "%", sep = "")

descriptives$share_returned_PR[!is.na(descriptives$share_returned_PR)] <- 
  paste(
    round(100*descriptives$share_returned_PR[!is.na(descriptives$share_returned_PR)],0), "%", sep = "")

descriptives$share_returned_SMD[!is.na(descriptives$share_returned_SMD)] <- 
  paste(
    round(100*descriptives$share_returned_SMD[!is.na(descriptives$share_returned_SMD)],0), "%", sep = "")

# Produces table (Appendix B)
print(xtable(descriptives), include.rownames=FALSE)

#### Prep dataset for regression ####

landtag$tier_dummy <- relevel(as.factor(landtag$tier_dummy), ref = "SMD")

# codes preferential voting (PR in Bavaria, post-2011 HH and HB)
landtag$pv <- 0
landtag$pv[landtag$elected_from %in% c("proportional", "substitute") 
           & landtag$state == "BY"] <- 1
landtag$pv[landtag$elected_from %in% c("proportional", "substitute") 
           & landtag$state == "HH" & landtag$legislature %in% c(20, 21, 22)] <- 1
landtag$pv[landtag$elected_from %in% c("proportional", "substitute") 
           & landtag$state == "HB" & landtag$legislature %in% c(18,19,20)] <- 1
lt <- landtag_target %>%
  group_by(state, legislature, party_elected_short) %>%
  summarise(n_seats = n()) %>%
  group_by(state, legislature) %>%
  mutate(total_seats = sum(n_seats),
         share_seats = n_seats/total_seats,
         legislature = as.numeric(as.character(legislature)))

lt2 <- lt %>%
  mutate(legislature= as.numeric(as.character(legislature)) - 1) %>%
  rename(share_seats_t_plus_1 = share_seats,
         n_seats_t_plus_1 = n_seats) %>%
  dplyr::select(state, legislature, party_elected_short, share_seats_t_plus_1, n_seats_t_plus_1) 

lt3 <- left_join(lt, lt2, by = c("legislature", "state", "party_elected_short")) %>%
  mutate(share_seats = ifelse(is.na(share_seats), 0, share_seats),
         share_seats_t_plus_1 = ifelse(is.na(share_seats_t_plus_1), 0, share_seats_t_plus_1),
         n_seats = ifelse(is.na(n_seats), 0, n_seats),
         n_seats_t_plus_1 = ifelse(is.na(n_seats_t_plus_1), 0, n_seats_t_plus_1)) %>%
  mutate(difference_share = share_seats_t_plus_1 - share_seats,
         difference_seats = n_seats_t_plus_1 - n_seats) %>%
  dplyr::select(state, legislature, party_elected_short, difference_share, difference_seats)

landtag <- left_join(landtag %>% 
                       mutate(legislature = as.numeric(as.character(legislature))), 
                     lt3, by = c("legislature", "state", "party_elected_short"))

landtag$index <- paste(landtag$state,landtag$legislature)

#### Regressions ####
# All
df <- landtag %>% filter(in_landtag_end_term == 1)
# MXM only
df2 <- landtag %>% filter(in_landtag_end_term == 1, state %notin% c("HH", "HB", "SL"))

fit_re1 <- glm(data = df, 
               re_elected ~ tier_dummy + seniority + 
                 I(seniority^2) + age_end_legislature +
                 I(age_end_legislature^2) +
                 term_duration + gender + 
                 end_of_term_year + pv + 
                 difference_share + 
                 state*party_elected_short, family = binomial(link = "logit"))
summary(fit_re1)

fit_re2 <- glm(data = df, 
               re_elected ~ tier_dummy + seniority + 
                 I(seniority^2) + age_end_legislature +
                 I(age_end_legislature^2) +
                 term_duration + gender + 
                 end_of_term_year + pv + 
                 difference_seats + 
                 state*party_elected_short, 
               family = binomial(link = "logit"))
summary(fit_re2)

fit_re3 <- glm(data = df2, 
               re_elected ~ tier_dummy + seniority + 
                 I(seniority^2) + age_end_legislature +
                 I(age_end_legislature^2) +
                 term_duration + gender + 
                 end_of_term_year + pv + 
                 difference_seats + 
                 state*party_elected_short, 
               family = binomial(link = "logit"))
summary(fit_re3)

fit_re4 <- glm(data = df, 
               returned ~ tier_dummy + seniority + 
                 I(seniority^2) + age_end_legislature +
                 I(age_end_legislature^2) +
                 term_duration + gender + 
                 end_of_term_year + pv + 
                 difference_share + 
                 state*party_elected_short,
               family = binomial(link = "logit"))
summary(fit_re4)

fit_re5 <- glm(data = df, 
               returned ~ tier_dummy + seniority + 
                 I(seniority^2) + age_end_legislature +
                 I(age_end_legislature^2) +
                 term_duration + gender + 
                 end_of_term_year + pv + 
                 difference_seats + 
                 state*party_elected_short,
               family = binomial(link = "logit"))
summary(fit_re5)

fit_re6 <- glm(data = df2, 
               returned ~ tier_dummy + seniority + 
                 I(seniority^2) + age_end_legislature +
                 I(age_end_legislature^2) +
                 term_duration + gender + 
                 end_of_term_year + pv + 
                 difference_seats + 
                 state*party_elected_short,
               family = binomial(link = "logit"))
summary(fit_re6)

clustered_se1 <- cluster.vcov(fit_re1, landtag %>% filter(in_landtag_end_term == 1) %>% 
                                dplyr::select(state, legislature))
cluster_se_fit_re1 <- coeftest(fit_re1, clustered_se1)
clustered_se2 <- cluster.vcov(fit_re2, landtag %>% filter(in_landtag_end_term == 1) %>% 
                                dplyr::select(state, legislature))
cluster_se_fit_re2 <- coeftest(fit_re2, clustered_se2)
clustered_se3 <- cluster.vcov(fit_re3, landtag %>% filter(in_landtag_end_term == 1,
                                                          state %notin% c("HH", "HB", "SL")) %>% 
                                dplyr::select(state, legislature))
cluster_se_fit_re3 <- coeftest(fit_re3, clustered_se3)
clustered_se4 <- cluster.vcov(fit_re4, landtag %>% filter(in_landtag_end_term == 1) %>% 
                                dplyr::select(state, legislature))
cluster_se_fit_re4 <- coeftest(fit_re4, clustered_se4)
clustered_se5 <- cluster.vcov(fit_re5, landtag %>% filter(in_landtag_end_term == 1) %>% 
                                dplyr::select(state, legislature))
cluster_se_fit_re5 <- coeftest(fit_re5, clustered_se5)
clustered_se6 <- cluster.vcov(fit_re6, landtag %>% filter(in_landtag_end_term == 1,
                                                          state %notin% c("HH", "HB", "SL")) %>% 
                                dplyr::select(state, legislature))
cluster_se_fit_re6 <- coeftest(fit_re6, clustered_se6)

# Produces table 3 in paper
stargazer(cluster_se_fit_re1,cluster_se_fit_re2,cluster_se_fit_re3, digits = 2, single.row = FALSE)
stargazer(cluster_se_fit_re4,cluster_se_fit_re5,cluster_se_fit_re6, digits = 2, single.row = FALSE)

c(round(DescTools::PseudoR2(fit_re1, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit_re2, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit_re3, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit_re4, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit_re5, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit_re6, which = "Nagelkerke"),3))

c(nobs(fit_re1),
  nobs(fit_re2),
  nobs(fit_re3),
  nobs(fit_re4),
  nobs(fit_re5),
  nobs(fit_re6))

# Get marginal effects for figure 3 in paper

me1 <- summary(margins::margins(fit_re1, variables = "tier_dummy",
                                vcov = clustered_se1))
me2 <- summary(margins::margins(fit_re2, variables = "tier_dummy",
                                vcov = clustered_se2))
me3 <- summary(margins::margins(fit_re3, variables = "tier_dummy",
                                vcov = clustered_se3))
me4 <- summary(margins::margins(fit_re4, variables = "tier_dummy",
                                vcov = clustered_se4))
me5 <- summary(margins::margins(fit_re5, variables = "tier_dummy",
                                vcov = clustered_se5))
me6 <- summary(margins::margins(fit_re6, variables = "tier_dummy",
                                vcov = clustered_se6))

AME_data <- rbind.data.frame(as_tibble(me1) %>% dplyr::mutate(model = "Model 1", 
                                                              dv = "Average Marginal Effect of List PR on \n Probability of Being Re-elected to the Landtag"),
                             as_tibble(me2) %>% dplyr::mutate(model = "Model 2", 
                                                              dv = "Average Marginal Effect of List PR on \n Probability of Being Re-elected to the Landtag"),
                             as_tibble(me3) %>% dplyr::mutate(model = "Model 3", 
                                                              dv = "Average Marginal Effect of List PR on \n Probability of Being Re-elected to the Landtag"),
                             as_tibble(me4) %>% dplyr::mutate(model = "Model 4", 
                                                              dv = "Average Marginal Effect of List PR on \n Probability of Being Returned to the Landtag"),
                             as_tibble(me5) %>% dplyr::mutate(model = "Model 5", 
                                                              dv = "Average Marginal Effect of List PR on \n Probability of Being Returned to the Landtag"),
                             as_tibble(me6) %>% dplyr::mutate(model = "Model 6", 
                                                              dv = "Average Marginal Effect of List PR on \n Probability of Being Returned to the Landtag")) %>%
  mutate(model = fct_relevel(model, 
                             "Model 6", "Model 5","Model 4","Model 3","Model 2","Model 1"))

# produces and saves figure 3 in paper

ggplot(data = AME_data, mapping = aes(x = AME, y = model)) + geom_point() + geom_errorbar(aes(xmin=lower,xmax=upper), width = 0) + 
  facet_wrap(~dv, nrow = 2, scales = "free_y") + scale_x_continuous() + theme_minimal() + ylab("") + xlab("Average Marginal Effect") + xlim(-0.19, 0) + 
  geom_vline(xintercept = 0, lty = 2, lwd = 0.25)

ggsave("AME_reelec.png", height = 3.2, width = 8)

